## "China's Ideological Spectrum" by Pan and Xu (2018)
## Replication by Chen & Chen 
install.packages("psych")

## set working directory, where the ReadMe.txt is in## 

##########################################################
### Exact Replication ###
##########################################################

### Install Packages
install.packages("lavaan")
install.packages("doParallel","foreach")

########################################################
## CFA analysis: Searching for a better model 
########################################################

### WARNING - this part takes extremely long processing time
### Results can be directly loaded from intermediate file (line 115)

rm(list=ls(all=TRUE)) 
library(lavaan)

## ## load data
load("data/sample10K.RData")
names(d)
table(d$year)

## ## Grouping survey questions
questions <- paste("q", 1:50, sep = "")
## Pan&Xu
f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
f3 <- c(32, 41, 42, 43, 46, 47, 48, 49, 50) # traditional values
f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism
f5 <- c(24, 6, 7, 8, 44, 45) # individual freedom
f6 <- c(23, 26, 28, 35, 39) # national interest
f7 <- c(22, 31, 33, 34, 36, 38) # social justice
FSet <- list(f1, f2, f3, f4, f5, f6, f7)

## ## a function that generates lavaan model syntax from a list of nubmers
genModel <- function(model.list) {
  model <- ""
  for (j in 1:length(model.list)) {
    model <- paste(model,
                   paste('factor',j,' =~ ',
                         paste(paste('q', model.list[[j]], sep=""),
                               collapse = " + "), sep = ""),
                   "\n", sep = "") 
  }
  return(model)
}

## ## first, search for the best combination among the three base categories
PSet <- matrix(NA, 0, 7)
colnames(PSet) <- paste("assign",1:7, sep = "")

## ## 877 possibilities
c1 <- 1
for (c2 in 1:2) {
  for (c3 in 1:(max(c1,c2)+1)) {
    for (c4 in 1:(max(c1,c2,c3)+1)) {
      for (c5 in 1:(max(c1,c2,c3,c4)+1)) {
        for (c6 in 1:(max(c1,c2,c3,c4,c5)+1)) {
          for (c7 in 1:(max(c1,c2,c3,c4,c5,c6)+1)) {
            PSet <- rbind(PSet, c(c1, c2, c3, c4, c5, c6, c7))
          }
        }
      }
    } 
  }
}
results <- matrix(NA, dim(PSet)[1], 6)
colnames(results) <- c("converged","post.check","chisq", "cfi", "tli", "rmsea")

## ## parallel computing: THIS'S GONNA TAKE A WHILE
## ## jump to line 113 to see the results

library(doParallel)
library(foreach)
cl <- makeCluster(10)
registerDoParallel(cl)
system.time(
  results <- foreach (i=1:dim(PSet)[1], .combine = rbind, .packages = c("lavaan"),
                      .inorder = FALSE) %dopar% {
                        model.list <- list()
                        for (j in 1:7) {
                          if (j %in% PSet[i, ]) {
                            model.list <- c(model.list, list(unique(unlist(FSet[which(PSet[i, ] == j)]))))
                          }
                        }
                        suppressWarnings(fit <- cfa(genModel(model.list),
                                                    ordered = questions,
                                                    data=d[,questions],
                                                    std.lv = TRUE))
                        converged <- inspect(fit, c("converged"))
                        if (converged == TRUE) {
                          post.cov <- suppressWarnings(inspect(fit, c("post.check")))
                          out <- c("converged" = converged, "post.cov" = post.cov,
                                   fitMeasures(fit, c("chisq","cfi", "tli","rmsea"))) 
                        } else {
                          out <- c(0, rep(NA, 5))
                        }
                        return(out)
                      })
stopCluster(cl)
colnames(results) <- c("converged", "post.check","chisq", "cfi", "tli", "rmsea")

ndim <- apply(PSet[,1:7],1,max)
results.final <- cbind.data.frame(PSet, results, ndim)
results.final$id <- 1:dim(results.final)[1]
rownames(results.final) <- NULL
head(results.final)

save(results.final, file = "output/result_cfa_search.RData")


########### Show Results ###################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_search.RData")
post.cov <- which(results.final$post.check == 1)
good.results <- results.final[post.cov,]
(best.id<-which.min(good.results$chisq))
which.max(good.results$cfi)
which.max(good.results$tli)
which.min(good.results$rmsea)
print(good.results[best.id, ])

## Assignment: 1, 2, 2, 3, 1, 2, 2
## ------------------------------------
f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
f5 <- c(24, 6, 7, 8, 41, 44, 45, 50) # individual freedom
## ------------------------------------
f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
f3 <- c(32, 42, 43, 46, 47, 48, 49) # traditional values
f6 <- c(23, 26, 28, 35, 39) # national interest
f7 <- c(22, 31, 33, 34, 36, 38) # social justice
## ------------------------------------
f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism

## best models
post.cov <- which(results.final$post.check == 1)
length(post.cov) ## 92 have postive definite covariance matrix
length(post.cov)/dim(results.final)[1] ## 10% of the models
good.results <- results.final[post.cov,]

head(good.results)
table(good.results$ndim)
## 1  2  3 
## 1 39 52 

##############
## Table 1
##############

## The best model: 3 dimensional
library(plyr)
best.results <- ddply(good.results, .(ndim), function(x){
  data.frame(x[which.min(x$chisq),])
})
(best.id<-which.min(good.results$chisq))
which.max(good.results$cfi)
which.max(good.results$tli)
which.min(good.results$rmsea)
print(good.results[best.id, ])
print(best.results[c(3,2,1),])
## p-values
1 - pchisq(301, 2)
1 - pchisq(2180, 3)

##############
## Figure 5
##############

pdf("graphs/cfa_chisq.pdf")
par(mar = c(4, 4, 1, 1))
plot(x = jitter(results.final$ndim[-post.cov]),
     y = results.final$chisq[-post.cov],
     xlim = c(0.5,7.5), ylim = c(64500,68100),
     col = "gray80", xlab = "", ylab = "", cex.axis = 1.2) 
points(jitter(results.final$ndim[setdiff(post.cov,best.results$id)]),
       results.final$chisq[setdiff(post.cov,best.results$id)],
       pch = 1, col = "gray30")
points(best.results$ndim, best.results$chisq,
       pch = 16, cex = 1.5, col = 1)
text(1, 67800, "Model C", cex = 1.2)
text(2, 65900, "Model B", cex = 1.2)
text(3, 65580, "Model A", cex = 1.2)
legend("topright", c("Valid","Invalid (neg. def. cov.)"),
       col = c("gray10","gray70"), cex = 1.3,
       pch = 1, bty = "n")
mtext("# Latent Factors", 1, 2.5, cex = 1.6)
mtext("Chi-squared", 2, 2.5, cex = 1.6)
graphics.off()

pdf("graphs/cfa_rmesa.pdf")
par(mar = c(4, 4, 1, 1))
plot(x = jitter(results.final$ndim[-post.cov]),
     y = results.final$rmsea[-post.cov],
     xlim = c(0.5,7.5), ylim = c(0.07385, 0.07560),
     col = "gray80", xlab = "", ylab = "", cex.axis = 1.2) 
points(jitter(results.final$ndim[setdiff(post.cov,best.results$id)]),
       results.final$rmsea[setdiff(post.cov,best.results$id)],
       pch = 1, col = "gray30")
points(best.results$ndim, best.results$rmsea,
       pch = 16, cex = 1.5, col = 1)
text(1, 0.0753, "Model C", cex = 1.2)
text(2, 0.07425, "Model B", cex = 1.2)
text(3, 0.07415, "Model A", cex = 1.2)
legend("topright", c("Valid","Invalid (neg. def. cov.)"),
       col = c("gray10","gray70"), cex = 1.3,
       pch = 1, bty = "n")
mtext("# Latent Factors", 1, 2.5, cex = 1.6)
mtext("RMSEA", 2, 2.5, cex = 1.6)
graphics.off()




